/*

 Program auxiliary.do prepares some auxiliary files.

 Kumler, Verhoogen, Frias "Enlisting Employees ..." REStat forthcoming

*/

#delimit;
set more off;

*** housekeeping;
do ${code}housekeeping.do;
set mem 1g;
set processors 8;


*****************************************************************************;
*************************** prepare price indices ***************************;
*****************************************************************************;


* Note the CPI and PPI from Banxico do not have the same base year;

use ${raw}banxicopriceindices19732006, clear;

*add intial zero to months with only one digit;
replace fecha="0"+fecha if length(fecha)==6;

gen month=substr(fecha, 1, 2);
gen year=substr(fecha, 4, 4);

destring year, replace;

*assign quarter to last month in quarter and keep these observations;
gen qtr=.;
replace qtr=1 if month=="03";
replace qtr=2 if month=="06";
replace qtr=3 if month=="09";
replace qtr=4 if month=="12";


*** make quarterly version of price deflator data;

preserve;
keep if qtr~=.;

* adjust both cpi and ppi so that June 2002 is base month;
foreach var in cpi ppi
{;
 gen `var'_062002 = `var' if year==2002 & month=="06";
 egen `var'062002 = max(`var'_062002);
 replace `var' = (`var'/`var'062002)*100;
 drop `var'_062002 `var'062002;
};

keep year qtr cpi ppi;
order year qtr cpi ppi;
sort year qtr;
save ${work}banxicopriceindices19732006_qtr, replace;

restore;

*** make annual version (annual average) of price deflator data;

collapse (mean) cpi ppi, by(year);

* adjust both cpi and ppi so that 2002 is base year;
foreach var in cpi ppi
{;
 gen `var'_2002 = `var' if year==2002;
 egen `var'2002 = max(`var'_2002);
 replace `var' = (`var'/`var'2002)*100;
 drop `var'_2002 `var'2002;
};

rename year periodo;
order periodo cpi ppi;
save ${work}banxicopriceindices19732006_yr, replace;

*****************************************************************************;
*************************** prepare minimum wages ***************************;
*****************************************************************************;

*Note: This code calculates minimum wages at the end of each quarter, since
*that is when the data is pulled from IMSS.;

local fyr=1982;
local lyr=2008;
local nyrs = `lyr'-`fyr'+1;
local nobs = 4*`nyrs';

clear;
set obs `nobs';
gen year = ceil(_n/4)+`fyr'-1;
bys year: gen qtr = _n;

gen start_date = .;
replace start_date=mdy(3,30,year) if qtr==1;
replace start_date=mdy(6,30,year) if qtr==2;
replace start_date=mdy(9,30,year) if qtr==3;
replace start_date=mdy(12,30,year) if qtr==4;
format start_date %dN/D/Y;
gen keepdate=1;

append using ${raw}sat_salarios_minimos_1982_2008.dta;
sort start_date;
replace zona_a = zona_a[_n-1] if keepdate==1;
replace zona_b = zona_b[_n-1] if keepdate==1;
replace zona_c = zona_c[_n-1] if keepdate==1;
keep if keepdate==1;

keep year qtr zona_a zona_b zona_c;
rename zona_a zona_1;
rename zona_b zona_2;
rename zona_c zona_3;

reshape long zona_, i(year qtr) j(z);
gen str1 zona = "A" if z==1;
replace zona="B" if z==2;
replace zona="C" if z==3;
rename zona_ salmin;

replace salmin = salmin/1000 if year<=1992;

keep year qtr zona salmin;
order year qtr zona salmin;
sort year qtr zona;
save ${work}minwages.dta, replace;

*****************************************************************************;
***************************** prepare top-codes *****************************;
*****************************************************************************;
clear;
use ${work}minwages.dta;

* merge in cpi;
sort year qtr;
merge year qtr using ${work}banxicopriceindices19732006_qtr.dta;

tab _merge;
tab year if _merge==1 | _merge==2;
keep if _merge==3;
drop _merge;

bysort year qtr: egen salmindf = max(salmin);
gen topsal = 10*salmindf if year<=1992 | (year==1993 & qtr<=2);
replace topsal = 18*salmindf if year==1993 & (qtr==3 | qtr==4);
replace topsal = 25*salmindf if year>=1994;

gen rtopsal = topsal/(cpi/100);
gen rsalmin = salmin/(cpi/100);

egen rtopsalmin_zonec = min(rtopsal);
bysort zona: egen rtopsalmin_byzone = min(rtopsal);
gen rsalmin91 = rsalmin if year==1991;
egen rsalmin91_zonea = max(rsalmin91); * 1991 zona A minimum, to be used for all zones, years;
bysort zona: egen rsalmin91_byzone = max(rsalmin91);

drop rtopsal ppi;

sort year qtr zona;

save ${work}minwages_wtopcodes.dta, replace;

**************************************************************************;
****************** prepare mpio_imss-mpio_inegi-zona concordance *********;
**************************************************************************;

clear;
use ${raw}munwzones_updated2;

tostring mpio_inegi, replace;
rename municipio mpio_imss;
gen str2 ent = string(ent_imss_bkp);
rename desc_municipio mpio_imss_desc;
rename minwgzone zona;

** add leading zeros to inegi codes;

replace mpio_inegi = "0" + mpio_inegi if length(mpio_inegi)==2;
replace mpio_inegi = "00" + mpio_inegi if length(mpio_inegi)==1;
replace ent = "0" + ent if length(ent)==1;

gen str5 entmpio_inegi = ent + mpio_inegi;

** add leading zero to imss mpio codes;

replace mpio_imss = "0" + mpio_imss if length(mpio_imss)==2;

* check that duplicates of mpio_imss also have duplicates of zona -- currently all do;
bysort mpio_imss: gen flag=1 if _n>1 & zona~=zona[_n-1];
count if flag==1;
drop flag;

* include one line per mpio_imss code -- only affects D.F. and all D.F. is zone A;
bysort mpio_imss: keep if _n==1;

drop if mpio_imss=="0";

order mpio_imss entmpio_inegi zona mpio_imss_desc;
keep mpio_imss entmpio_inegi zona mpio_imss_desc;

save ${work}mpio_imss_inegi_zona, replace;


**************************************************************************;
****************** prepare imss registro-zona concordance ****************;
**************************************************************************;

clear;
use ${work}mpio_imss_inegi_zona;

keep mpio_imss zona;

*check that duplicates of mpio_imss also have duplicates of zona;
*currently all do;
bys mpio_imss: gen flag=1 if _n>1 & zona~=zona[_n-1];
count if flag==1;
drop flag;

*include one line per mpio_imss code -- only affects D.F. and all D.F. is zone A;
bys mpio_imss: keep if _n==1;

sort mpio_imss;

tempfile municipio;
save `municipio', replace;

* merge into list of registros;

use ${work}imssdir, clear;
rename municipio mpio_imss;
sort mpio_imss;

merge mpio_imss using `municipio', uniqusing;
tab _merge;
drop if _merge==2;
drop _merge;


* fill in zona in cases where whole delegacion or entidad maps to a single zona;

replace zona = "C" if zona=="" & (inlist(delegacion, 1, 2, 3, 4, 5,
    6, 7, 10, 11, 13, 17, 18, 19, 21, 22, 23, 24, 25, 26, 28, 30, 33,
    34) | inlist(ent, 1, 4, 5, 6, 7, 10, 11, 12, 13, 16, 17, 18, 20,
    21, 22, 23, 24, 25, 27, 29, 31, 32) );

replace zona = "A" if zona=="" & (inlist(delegacion, 35, 36, 37, 38, 39, 40) |
    inlist(ent, 2, 3, 9) );

replace zona="A" if zona=="";

sort registro;
save ${work}imssdir_zona, replace;


*********************************************************************************;
********************** prepare IMSS-ENEU geographic concordances ****************;
*********************************************************************************;

*** get IMSS codes and crosswalk ready ***;

*imss/inegi mpio concordance;

clear;
use ${raw}imss_mpio_codes;

rename desc_municipio mpio_imss_desc;
rename mpio_inegi imss_inegi_municipio;
rename ent ent_imss_inegi;

*add leading zeros to inegi codes;
tostring imss_inegi_municipio ent_imss_inegi, replace;
replace imss_inegi_municipio="0"+imss_inegi_municipio
 if length(imss_inegi_municipio)==2;
replace imss_inegi_municipio="00"+imss_inegi_municipio
 if length(imss_inegi_municipio)==1;
replace ent_imss_inegi="0"+ent_imss_inegi
 if length(ent_imss_inegi)==1;

gen entmpio_inegi = ent_imss_inegi+imss_inegi_municipio;

keep mpio_imss mpio_imss_desc entmpio_inegi ent_imss_inegi;

sort entmpio_inegi;

save ${work}imss_inegi_mpio_codes, replace;


*** get ENEU codes ready ***;

* Merge geographic information based on INEGI codes for each municipality;

insheet using ${raw}metro_areas_n.csv, clear;
sort metro_area;
tempfile metro_areas_n;
save `metro_areas_n';

forval i=1/3
{;

 * eneu metropolitan area codes;

 * Note: The files metro_area`i' were created by cutting and pasting from the cobgeoX.doc
   files that came with the ENEU documentation.;

 insheet metro_area`i' entidad`i' municipio`i' localidad`i'
     using ${work}metro_areas`i'.csv, clear;

 * fill down missing geographic information;
 foreach var in metro_area entidad municipio
 {;
  replace `var'`i'=`var'`i'[_n-1] if `var'`i'=="";
 };

 rename metro_area`i' metro_area;

 * rename CD. DE MEXICO as MEXICO in order to merge;
 replace metro_area="MEXICO" if metro_area=="CD. DE MEXICO";

 * merge metro areas numbers;
 * _merge should only equal 2 or 3, keep only 3;
 sort metro_area;
 merge metro_area using `metro_areas_n';
 tab _merge;
 tab metro_area_n if _merge==2;
 tab municipio`i' if _merge==2;
 keep if _merge==3;
 drop _merge;

 rename metro_area metro_area`i';
 rename metro_area_n metro_area_n`i';

 *divide up numbers and names;
 gen entidad`i'_num = substr(entidad`i', 1, 2);
 gen entidad`i'_desc = substr(entidad`i', 3, .);
 gen municipio`i'_num = substr(municipio`i', 1, 3);
 gen municipio`i'_desc = substr(municipio`i', 4, .);

 * fix code for 15011 - Atenco;
 replace municipio`i'_num="011" if municipio`i'_num=="039"
     & `i'==1 & municipio`i'_desc==" Atenco";

 * entmpio_inegi code is 2 dig state and 3 dig municipality;
 gen entmpio_inegi= entidad`i'_num + municipio`i'_num;

 *drop duplicate municipalities that have multiple localities;
 *check to make sure duplicate ENTMPIO_INEGI codes are real duplicates;
 duplicates tag entmpio_inegi, gen(dup_entmpio_inegi);
 duplicates tag metro_area`i' entidad`i' municipio`i', gen(dup_all);
 gen dup_match=(dup_entmpio_inegi==dup_all);
 assert dup_match==1;

 duplicates drop entmpio_inegi, force;
 drop entidad`i' municipio`i' localidad`i';
 drop dup*;

 keep metro_area`i' metro_area_n`i' entidad`i'_num entidad`i'_desc municipio`i'_num
   municipio`i'_desc entmpio_inegi;

 sort entmpio_inegi;

 save ${tmp}metro_areas`i', replace;

};

*** merge IMSS and ENEU codes;

clear;
use ${work}imss_inegi_mpio_codes;

sort entmpio_inegi;
merge entmpio_inegi using ${tmp}metro_areas1, uniqusing;
tab _merge;
drop _merge;

sort entmpio_inegi;
merge entmpio_inegi using ${tmp}metro_areas2, uniqusing;
tab _merge;
drop _merge;

sort entmpio_inegi;
merge entmpio_inegi using ${tmp}metro_areas3, uniqusing;
tab _merge;
drop _merge;

* harmonize metro area codes;

* Note: There is a small number of municipios that changed
  metro areas in the ENEU, between Puebla and Tlaxcala. The ENEU
  ent/mpio codes are 29017, 29025, 29027, 29041. The IMSS mpio codes
  (some map to the same ENEU mpio code) are: P94, H76, 610, L08, 618,
  619, F14. These are municipios in the state of Tlaxcala but that
  appear on the map to be closer to Puebla. We assume that
  these are part of the Puebla metro area;

count if metro_area_n1 ~= metro_area_n2 & metro_area_n1~=. & metro_area_n2~=.;
count if metro_area_n1 ~= metro_area_n3 & metro_area_n1~=. & metro_area_n3~=.;
count if metro_area_n2 ~= metro_area_n3 & metro_area_n2~=. & metro_area_n3~=.;

gen metro_area_eneu = metro_area_n1;
replace metro_area_eneu = metro_area_n2 if metro_area_eneu==. & metro_area_n2~=.;
replace metro_area_eneu = metro_area_n3 if metro_area_eneu==. & metro_area_n3~=.;

gen metro_area_eneu_desc = metro_area1;
replace metro_area_eneu_desc = metro_area2 if metro_area_eneu_desc=="" & metro_area2~="";
replace metro_area_eneu_desc = metro_area3 if metro_area_eneu_desc=="" & metro_area3~="";

gen ent_eneu_desc = entidad1_desc;
replace ent_eneu_desc = entidad2_desc if ent_eneu_desc=="" & entidad2_desc~="";
replace ent_eneu_desc = entidad3_desc if ent_eneu_desc=="" & entidad3_desc~="";

gen mpio_eneu_desc = municipio1_desc;
replace mpio_eneu_desc = municipio2_desc if mpio_eneu_desc=="" & municipio2_desc~="";
replace mpio_eneu_desc = municipio3_desc if mpio_eneu_desc=="" & municipio3_desc~="";

replace metro_area_eneu = 4 if inlist(entmpio_inegi, "29017", "29025", "29027", "29041")==1;

replace metro_area_eneu = 1 if entmpio_inegi=="09001";

keep entmpio_inegi ent_eneu_desc mpio_eneu_desc mpio_imss mpio_imss_desc
    metro_area_eneu metro_area_eneu_desc;

order entmpio_inegi ent_eneu_desc mpio_eneu_desc mpio_imss mpio_imss_desc
    metro_area_eneu metro_area_eneu_desc;

sort entmpio_inegi;

save ${work}geocodes_imss_eneu, replace;

*************************************************************************;
******************** prepare imss contribution rates ********************;
*************************************************************************;

* This code calculates the contribution rates at the end of each
  quarter, since that is when the data is pulled from IMSS.;

*** employer contributions;

local fyr=1982;
local lyr=2005;
local nyrs = `lyr'-`fyr'+1;
local nobs = 4*`nyrs';

clear;
set obs `nobs';
gen year = ceil(_n/4)+`fyr'-1;
bys year: gen qtr = _n;

gen start_date = .;
replace start_date=mdy(3,31,year) if qtr==1;
replace start_date=mdy(6,30,year) if qtr==2;
replace start_date=mdy(9,30,year) if qtr==3;
replace start_date=mdy(12,31,year) if qtr==4;
format start_date %dN/D/Y;
gen keepdate=1;

append using ${raw}imss_employer_contrib_ev_2011_04_12.dta;

sort start_date keepdate;

*loop over variables;
foreach var of varlist sem-limit_cpda {;
	replace `var' = `var'[_n-1] if keepdate==1;
};

keep if keepdate==1;
drop keepdate start_date;

keep year qtr sem-limit_cpda;

sort year qtr;

save ${work}imss_employer_contrib, replace;

*** worker contributions;

local fyr=1982;
local lyr=2005;
local nyrs = `lyr'-`fyr'+1;
local nobs = 4*`nyrs';

clear;
set obs `nobs';
gen year = ceil(_n/4)+`fyr'-1;
bys year: gen qtr = _n;

gen start_date = .;
replace start_date=mdy(3,31,year) if qtr==1;
replace start_date=mdy(6,30,year) if qtr==2;
replace start_date=mdy(9,30,year) if qtr==3;
replace start_date=mdy(12,31,year) if qtr==4;
format start_date %dN/D/Y;
gen keepdate=1;

append using ${raw}imss_worker_contrib_ev_2011_04_12.dta;

sort start_date keepdate;

*loop over variables;
foreach var of varlist sem-limit_cpda {;
	replace `var' = `var'[_n-1] if keepdate==1;
};

keep if keepdate==1;
drop keepdate start_date;

keep year qtr sem-limit_cpda;

sort year qtr;

save ${work}imss_worker_contrib, replace;

*** government contributions;

local fyr=1982;
local lyr=2005;
local nyrs = `lyr'-`fyr'+1;
local nobs = 4*`nyrs';

clear;
set obs `nobs';
gen year = ceil(_n/4)+`fyr'-1;
bys year: gen qtr = _n;

gen start_date = .;
replace start_date=mdy(3,31,year) if qtr==1;
replace start_date=mdy(6,30,year) if qtr==2;
replace start_date=mdy(9,30,year) if qtr==3;
replace start_date=mdy(12,31,year) if qtr==4;
format start_date %dN/D/Y;
gen keepdate=1;

append using ${raw}imss_govt_contrib_tjk_2011_10_08.dta;

sort start_date keepdate;

*loop over variables;
foreach var of varlist sem-limit_cpda {;
	replace `var' = `var'[_n-1] if keepdate==1;
};

keep if keepdate==1;
drop keepdate start_date;

keep year qtr sem-limit_cpda;

sort year qtr;

save ${work}imss_government_contrib, replace;

*******************************************************************;
********************** prepare imss benefits **********************;
*******************************************************************;

** There are four benefits schemes.
	1 - Prior to 1991, which is the scheme we are most unsure of. For now
	we assume that the benefit is equal to 45% of the nominal salary in
	the last 5 years, and 1.5% of the nominal salary in the last 5 years for
	each year of work over 10 years
	2 - 1991 - 1992, which is based on the files below. The first file has
	minimum, maximum values, etc. while the second file has the benefit
	schedule.
	3 - 1992 - 1997, which is the same as 2 but with the 2% IRA contribution
	4 - post 1997, which is the IRA accounts;

local fyr=1982;
local lyr=2005;
local nyrs = `lyr'-`fyr'+1;
local nobs = 4*`nyrs';

clear;
set obs `nobs';
gen year = ceil(_n/4)+`fyr'-1;
bys year: gen qtr = _n;

gen start_date = .;
replace start_date=mdy(3,31,year) if qtr==1;
replace start_date=mdy(6,30,year) if qtr==2;
replace start_date=mdy(9,30,year) if qtr==3;
replace start_date=mdy(12,31,year) if qtr==4;
format start_date %dN/D/Y;
gen keepdate=1;

append using ${raw}imss_benefits_ev_2011_04_19.dta;

sort start_date keepdate;

*loop over variables;
foreach var of varlist min_yrs min_p min_p_rate max_p* sar {;
	replace `var' = `var'[_n-1] if keepdate==1;
};

keep if keepdate==1;
drop keepdate start_date;

keep year qtr min_yrs min_p min_p_rate max_p* sar;

sort year qtr;

save ${work}imss_benefits, replace;

log close;


